/* ----------------------------------------------------------------------------- CPS_taxsim_main.do

This file creates a second taxunit variable which will identify the primary filers in the tax unit
(head and spouse in the case of married filing jointly), and dependent filers.

As a secondary task, this file also codes up income groups for the deduction imputation using a 
rough measure of AGI. 

Last updated (this file): 8/22/13

---------------------------------------------------------------------------------------------------
*/


cap confirm file CPS_taxsim_tukey2.dta DO IT AGAIN
if _rc!=0 {
	forvalues y=1986/2012 {

		use CPS_mar`y', clear
		qui gen yof=`y'
		cap rename hhseq h_seq
		cap rename pppos ppos
		cap rename a_age age
		cap rename peage age
		cap rename a_sex sex
		cap rename a_famrel relhead

	// -------------------------------------------------- 1. Bring in the cell means

		cap drop _merge
		sort yof h_seq ppos
		qui merge 1:1 yof h_seq ppos using CPS_cellmeans.dta // copied from main directory $ddCPS
		tab yof _merge
		qui keep if _merge!=2
		
		// Fill the topcode values for non-topcoded invididuals
		if `y'<=1987 {
			foreach v_long of varlist i5*_tc pinctot_tc {
				local v_short=subinstr("`v_long'","_tc","",.)
				qui replace `v_long'=`v_short' if `v_long'==.&`v_short'<.
			}
		}
		else {
			foreach v_long of varlist *_*_tc ptotval_tc {
				local v_short=subinstr("`v_long'","_tc","",.)
				qui replace `v_long'=`v_short' if `v_long'==.&`v_short'<.
			}
		}
		drop _merge
		/* DO WE CARE ABOUT THIS?
		// Generate cell-mean consistent household income
		qui egen htotval_tc=total(ptotval_tc) if yof>1987, by(yof h_seq)
		qui egen hinctot_tc=total(pinctot_tc) if yof<1988, by(yof h_seq) 
		*/

		
	// -------------------------------------------------- 2. Non-income variables
		
		
		// Bring in the tax units
		cap drop _merge
		merge 1:1 yof h_seq ppos using CPS_taxsim_tukey
		assert _merge>1
		keep if _merge==3
		drop _merge

		// Identify filers/dependents
		if `y'>1987 {
			drop pinctot_tc
			rename ptotval_tc pinctot_tc
			qui replace ws_val_tc = ws_val_tc + ern_val_tc if ern_srce==1
			qui replace se_val_tc = se_val_tc + ern_val_tc if ern_srce==2
			qui replace se_val_tc = se_val_tc // you only pay taxes on 92.35%, we don't have to worry about this later

			qui replace frm_val_tc = frm_val_tc + ern_val_tc if ern_srce==3 // because we already have the true se_val in household
																			// income.
		}
	
		cap drop filer_status
		qui gen filer_status=-9
		#d ;
		label define filer_status
			-9 "Unassigned"
			 1 "Married, filing jointly"
			 2 "Single individual"
			 3 "Head of household (single)"
			 4 "Dependent filer"
			 5 "Dependent non-filer"
			 6 "Other non-filer";
		#d cr
		label val filer_status filer_status
		// If you are married, you are filing jointly with your spouse
		qui replace filer_status = 1 if married_now==1
			// Everyone else in that tax unit is a dependent
			cap drop temp1
			cap drop temp2
			qui gen temp1 = filer_status==1
			qui egen temp2 = max(temp1), by(yof h_seq taxunit)
		qui replace filer_status = 4 if filer_status==-9&temp2==1&pinctot>0
		qui replace filer_status = 5 if filer_status==-9&temp2==1&pinctot<=0
			drop temp*
		// Tax units of 1 are all single
			cap drop temp_N
			bys yof h_seq taxunit: gen temp_N=_N
		qui replace filer_status=2 if temp_N==1&filer_status==-9
			drop temp*		
		// Everyone else: highest (non-zero) earner is the filer, everyone else is dependent
			cap drop temp_maxinc
			cap drop temp_nonzero
			qui gen temp_nonzero = pinctot_tc if pinctot_tc!=0
			qui egen temp_maxinc = max(temp_nonzero), by(yof h_seq taxunit)
			cap drop temp_mostinc 
			qui gen temp_mostinc = pinctot_tc==temp_maxinc
			cap drop temp_maxage
			qui egen temp_maxage = max(age), by(yof h_seq taxunit)
			cap drop temp_oldest 
			qui gen temp_oldest = age==temp_maxage
			compare temp_mostinc temp_oldest if filer_status==-9 // almost always the same
		qui replace filer_status=3 if temp_mostinc==1&temp_oldest==1&filer_status==-9
		// If there are still multiple hh's, it should probably be a result of 2 same ages both with no income
			cap drop temp1
			cap drop temp2
			qui gen temp1 = filer_status==3
			qui egen temp2 = total(temp1), by(yof h_seq taxunit)
			* tab temp2
			cap assert pinctot_tc==0 if temp1==1&temp2>1
			if _rc!=0 {
				noisily {
					di "Multiple head problem is not the result of multiple 0's"
					list h_seq if temp1==1&temp2>1&pinctot_tc!=0
				}
				// In these cases, just give it to the first person (there's about 1/year)
				// - This appears to actually be the same thing as the duplicate observation
				// problem we ran into in the matching process
				cap drop temp3
				cap drop temp4
				cap drop temp5
				qui gen temp3 = temp1==1&temp2>1
				qui gen temp4 = ppos if temp2==1
				qui egen temp5 = min(temp4), by(h_seq taxunit)
				qui replace filer_status=4 if temp3==1&ppos==temp5
	/*			if `y'==1987 { DON'T DO THIS CASE BY CASE ANYMORE
					replace filer_status=4 if h_seq==41412&ppos==2
				}
				if `y'==1988 {
					replace filer_status=4 if h_seq==10511&ppos==42
				} 
				*/
			}
			// If this is the case, both are non-filers!
			cap drop temp3
			cap drop temp4
			qui gen temp3 = ppos if temp1==1&temp2>1
			qui egen temp4 = min(temp3), by(yof h_seq taxunit)
			// Always 5 because we've determined no one has income
		qui replace filer_status=6 if temp1==1&temp2>1&filer_status==3
		// Next we assign the dependents of HH heads
			// Now recalculate 3's
			cap drop temp1
			cap drop temp2
			qui gen temp1 = filer_status==3
			qui egen temp2 = total(temp1), by(yof h_seq taxunit)
			* tab temp2
			// Everyone in a 1 filer household who is not the filer is a dependent
		qui replace filer_status=4 if temp1==0&temp2==1&filer_status==-9&pinctot>0
		qui replace filer_status=5 if temp1==0&temp2==1&filer_status==-9&pinctot<=0
		// If everyone in the TU has 0 income, then they're all non-filers
			cap drop temp_zeroinc
			cap drop temp_Nzeros
			cap drop temp_N
			cap drop temp_allzeros
			qui gen temp_zeroinc = pinctot_tc==0
			qui egen temp_Nzeros = total(temp_zeroinc), by(yof h_seq taxunit)
			bys yof h_seq taxunit: gen temp_N = _N
			qui gen temp_allzeros=temp_N==temp_Nzeros
		qui replace filer_status=6 if temp_allzeros==1
	// At this point nearly everyone who is unassigned has no filer in the HH
	// They get the following treatment:
		cap drop temp_filer
		qui gen temp_filer = inlist(filer_status,1,2,3)
		cap drop temp_Nfilers
		qui egen temp_Nfilers=total(temp_filer), by(yof h_seq taxunit)
		// No income? You are a non-filer
		qui replace filer_status=6 if pinctot_tc==0&filer_status==-9
		// Whomever has the highest income becomes the filer 
		// and everyone else in the TU is a dependent
		cap drop temp_Nhighe
		qui egen temp_Nhighe=total(temp_mostinc), by(yof h_seq taxunit)
		qui replace filer_status=3 if temp_Nhighe==1&temp_mostinc==1&filer_status==-9&temp_Nfilers==0
		// The cases left are children with parents present, parents have lower incomes, children have 
		// some kind of support (social security)
		cap drop temp_filer
		qui gen temp_filer = inlist(filer_status,1,2,3)
		cap drop temp_Nfilers
		qui egen temp_Nfilers=total(temp_filer), by(yof h_seq taxunit)
		cap drop temp_Noldest
		qui egen temp_Noldest=total(temp_oldest), by(yof h_seq taxunit)
		qui replace filer_status=3 if temp_Noldest==1&temp_oldest==1&filer_status==-9&temp_Nfilers==0
	// Now we have some stray clean up
		cap drop temp_filer
		qui gen temp_filer = inlist(filer_status,1,2,3)
		cap drop temp_Nfilers
		qui egen temp_Nfilers=total(temp_filer), by(yof h_seq taxunit)
		tab filer_stat temp_Nfilers, mi
		// Assign dependents now that we have primary filers in new HHs
		qui replace filer_stat=4 if filer_stat==-9
		loc ym1 = `y'-1
		/*
		di "===================================="
		di " Tax year `ym1'"
		di "===================================="
		tab filer_stat
		*/
		

	// -------------------------------------------------- 3. Individual income sources:
	// This is just a rough approximation for the imputation deduction, actual AGI is done
	// on NBER's end

		// Wages - these are just 
		cap drop wages_i
		qui gen wages_i=.
		if `y'<=1987 {
			qui replace wages_i = i51a + i51b + i51c if i51b>=0&i51c>=0
			qui replace wages_i = i51a if i51b<0|i51c<0
		}
		else {
			qui replace wages_i = ws_val_tc+se_val_tc+frm_val_tc if se_val_tc>=0&frm_val_tc>=0
			qui replace wages_i = ws_val_tc if se_val_tc<0|frm_val_tc<0
		}
		label var wages_i "Individual wages"
		// We need an extra step here to keep track of which earnings are self employment!
		cap drop se_wages_i 
		if `y'<=1987 qui gen se_wages_i = i51b + i51c if i51b>=0&i51c>=0
		else qui gen se_wages_i = se_val_tc+frm_val_tc if se_val_tc>=0&frm_val_tc>=0
		
		
		// Dividends
		cap drop dividends_i
		qui gen dividends_i=.
		if `y'<=1987 qui replace dividends_i=i53c // this also includes rental income until 1988
		else qui replace dividends_i=div_val_tc
		label var dividends_i "Individual dividend income"
		
		// Other property income
		// - post 1987 we have rent and interest separate from dividends
		cap drop otherprop_i
		if `y'<=1987 {
			qui gen otherprop_i=i53b
			qui replace otherprop_i=otherprop_i+i51b+i51c if i51b<0|i51c<0
		}
		else {
			qui gen otherprop_i=int_val_tc + rnt_val_tc
			qui replace otherprop_i=otherprop_i+se_val_tc+frm_val_tc if se_val_tc<0|frm_val_tc<0
		}
		label var otherprop_i "Individual other property income"
		
		// Pensions
		cap drop pensions_i
		if `y'<=1987 qui gen pensions_i=i53e 
		else qui gen pensions_i=ret_val1_tc + ret_val2_tc
		label var pensions_i "Individual pension income"
		
		// SS
		cap drop gssi_i
		if `y'<=1987 qui gen gssi_i = i52a+i52b // Social security + SSI
		else {
			qui gen gssi_i = ss_val_tc+ssi_val_tc+sur_val1_tc+sur_val2_tc+dis_val1_tc+dis_val2_tc
		}
		label var gssi_i "Individual gross SS"
			// Post 1987 we have survivor and disability benefits
		
		// Individual transfer income
		cap drop transfers_i
		if `y'<=1987 qui gen transfers_i=i53a
		else qui gen transfers_i=paw_val_tc+wc_val_tc+vet_val_tc+csp_val_tc
		
		// UI - pre 1988, this also has Vet and Worker's comp
		cap drop ui_i
		if `y'<=1987 qui gen ui_i=i53d
		else qui gen ui_i=uc_val_tc
		/* Just like Judith Scott-Clayton, we ignore
		- Educational assistance
		- Alimony
		- Contrib/financial asst
		- Misc (other) income
		*/
		
		// Dummies for SE income (+/-)
		cap drop selfemp
		qui gen selfemp=i51b!=0
		cap drop selfemp2
		qui gen selfemp2=i51b!=0|i51c!=0 // also farm income
		

	
		
		// 2nd tax unit identifier (this one separates out dependent filers)
		cap drop temp1
		cap drop temp2
		cap drop taxunit2
		qui gen temp1 = taxunit
		bys yof h_seq: gen temp2=_n // used to be by (yof h_seq taxunit) but that can actually create conflicts: see yof 2003 h_seq 33250
		qui replace temp1 = temp1+100+temp2 if filer_status==4 // use taxunit to count dependents
		rename temp1 taxunit2								   // use taxunit2 to collaps
															
				
	// -------------------------------------------------- 4. Here we deal with the SE problem for the fica calculation
		cap drop temp_pwage // non-se wages
		cap drop temp_pse   // se wages
		cap drop temp_swage
		cap drop temp_sse
		if `y'<=1987 {
			qui gen temp_pwage = i51a if inlist(filer_stat,1,2,3,4)
			qui gen temp_pse = i51b+i51c if i51b>0&i51c>0&inlist(filer_stat,1,2,3,4)
		}
		else {
			qui gen temp_pwage = ws_val_tc if inlist(filer_stat,1,2,3,4)
			qui gen temp_pse = se_val_tc+frm_val_tc if se_val_tc>=0&frm_val_tc>=0&inlist(filer_stat,1,2,3,4)
		}
		foreach v of varlist temp_pwage temp_pse {
			qui replace `v' = 0 if `v'==.
		}
		foreach t in wage se {
			cap drop temp1
			cap drop temp2
			cap drop temp3
			qui gen temp1 = temp_p`t' if filer_stat==1
			qui egen temp2 = max(temp1), by(yof h_seq taxunit2)
			qui egen temp3 = min(temp1), by(yof h_seq taxunit2)
			cap drop temp_s`t' 
			qui gen temp_s`t'=0
			qui replace temp_s`t'= temp3 if temp3<.
			qui replace temp_p`t' = temp2 if temp2<.
		}
		cap drop _merge
		merge m:1 yof using CPS_taxsim_ssadata
		drop if _merge<3
		// oasdi rates are given here, but we need to code up medicare hospital insurance
		rename maxbase oasdi_max
		cap drop hi_max
		qui gen hi_max = oasdi_max if yof<=1991
		qui replace hi_max = 125000 if yof==1992
		qui replace hi_max = 130200 if yof==1993
		qui replace hi_max = 135000 if yof==1994
		qui replace hi_max = . if yof>1994 // infinity (no cap)
		// Code up the wage bases
		foreach t in hi oasdi {
			cap drop `t'wages
			qui gen `t'wages=.
			qui replace `t'wages = temp_pwage + temp_swage + temp_pse + temp_sse
			qui replace `t'wages = min(`t'wages,`t'_max) if `t'wages<.
			cap drop prop_`t'nonse
			qui gen prop_`t'nonse = (temp_pwage + temp_swage)/`t'wages
			qui replace prop_`t'nonse = min(prop_`t'nonse,1)
			cap drop `t'_bill
			qui gen `t'_bill = (`t'rate*`t'wages)*(1-prop_`t'nonse) + .5*(`t'rate*`t'wages*prop_`t'nonse)
		}
		cap drop fica_bill
		qui gen fica_bill = hi_bill+oasdi_bill
		cap drop fica_deduction
		qui gen fica_deduction = .5*(hirate*hiwages*prop_hinonse) + .5*(oasdirate*oasdiwages*prop_oasdinonse)

		
		// Distribute wages of primary and secondary
		cap drop pwages
		cap drop swages
		qui gen pwages = wages_i if inlist(filer_stat,1,2,3,4) 
		cap drop temp1
		cap drop temp2
		cap drop temp3
		qui gen temp1 = wages_i if filer_stat==1 // we only worry about swages for married folks
		qui egen temp2 = min(temp1), by(yof h_seq taxunit)
		qui egen temp3 = max(temp1), by(yof h_seq taxunit) // we don't have to worry about anyone but the primary 
		qui replace pwages = temp3 if filer_stat==1 	 	// filers so we can use the broad TU here (not super clean but we want to avoid re-running at this point)
		qui gen swages=0
		qui replace swages = temp2 if filer_stat==1
			// Deduct the fica bit evenly from each person!
			cap drop tot_wages
			qui gen tot_wages = pwages + swages 
			cap drop prop_pwage 
			qui gen prop_pwage = pwages/tot_wages
			qui replace pwages = pwages - (prop_pwage)*fica_deduction
			qui replace swages = swages - (1-prop_pwage)*fica_deduction // here we take the fica deduction out!
			assert pwages>=0
			assert swages>=0




		// Do it again this time with self-employment income 
		cap drop temp1
		qui gen temp1 = se_wages_i if inlist(filer_stat,1,2,3,4)
		qui egen se_wages = total(temp1), by(yof h_seq taxunit2)
	


	// -------------------------------------------------- 5. TU level income coding
		
		loc vl1 "dividends_i otherprop_i pensions_i gssi_i transfers_i ui_i"
		foreach v in `vl1' {
			loc v2=subinstr("`v'","_i","",.)
			cap drop temp
			qui gen temp=`v' if inlist(filer_status,1,2,3)
			qui egen `v2' = total(`v'), by(yof h_seq taxunit2)
			qui replace `v2'=`v' if filer_status==4
		}
		
		cap drop alimony
		qui egen alimony = total(alm_val_tc), by(yof h_seq taxunit2)
	 * We're going to let taxsim create the AGI
		// Create an agi measure - not the official one, just something to use
		// for the imputation
		cap drop agi
		qui gen agi = pwages + swages + dividends + otherprop + pensions + ///
						gssi + alimony //above + alimony - transfers -ui
		
		

		cap drop y_benchmark
		qui summ agi
		loc magi=r(max)
		// Create a new y-group for imputation
		cap drop cpi // all incomes are adjusted to 2009
		set obs `=`=_N'+1'
		cap drop year
		qui gen year = `y'-1 // y is yof
		qui replace year=1999 if ppos==.
		getcpi, year(year) gen(cpi)
		qui summ cpi if year==1999, mean
		qui replace cpi=cpi/r(mean)
		drop if ppos==.
		rename agi agi_orig
		qui gen agi=agi_orig
		qui replace agi=round(agi/cpi)	
		cap drop y_group
		qui summ agi
		local magi=r(max)
		recode agi  (0/9999=0) (10000/19999=1) (20000/29999=2) (30000/39999=3) ///
			   (40000/49999=4) (50000/59999=5) (60000/69999=6) (70000/79999=7) ///
			   (80000/89999=8) (90000/99999=9) (100000/124999=10) (125000/149999=11) ///
			   (150000/174999=12) (175000/199999=13) (200000/`magi'=14), gen(y_group)
		
		
	*/	

	// -------------------------------------------------- 6. Impute itemized deductions	

		
		// Prep the "share of itemizers" file
		
		local yk=`y'
		if `yk'>2006 local yk=2006
		preserve
			use share, clear
			cap drop yr
			qui gen yr=real(_rowname)
			drop _rowname
			qui keep if yr==`yk' // bring forward 2006 values
			qui replace yr=`y'
			reshape long S, i(yr) j(temp)
			rename S share_itemizers
			rename temp y_group
			tempfile share
			save `share', replace
		restore
		
		// Prep the share of income file
		preserve
			use item_adgrin, clear
			cap drop yr
			qui gen yr=real(_rowname)
			drop _rowname
			qui keep if yr==`yk'
			qui replace yr=`y'
			reshape long S, i(yr) j(temp)
			rename S share_agi
			rename temp y_group
			tempfile sh_agi
			save `sh_agi', replace
		restore

		// Get imputed values for itemized deductions!
		preserve
			keep if inlist(filer_status,1,2,3,4)
			// Only keep one of the married folks (they only file 1 return)
			cap drop temp_N
			cap drop temp_n
			bys h_seq taxunit2: gen temp_N=_N
			bys h_seq taxunit2: gen temp_n=_n
			qui keep if temp_n==1
			
			// Bring in the share data
			cap drop _merge
			merge m:1 y_group using `share'
			tab y_group _merge
			list if _m==2
			* bys _m: summ y_group, d
			* bys _m: summ agi, d
			keep if _merge==3 // this is just folks who make less than 10000
			drop _merge
			
			// Bring in the prop income data
			cap drop _merge
			merge m:1 y_group using `sh_agi'
			tab y_group _merge
			keep if _merge==3
			drop _merge
			
			cap drop temp_rand1
			qui gen temp_rand1=runiform()
			sort temp_rand1
			cap drop temp_rand2
			qui gen temp_rand2=runiform()
			
			cap drop itemizer
			qui gen itemizer=temp_rand2<=share_itemizers
			summ itemizer, d
			cap drop itemized_share
			qui gen itemized_share=0
			qui replace itemized_share=share_agi if itemizer==1
			* bys itemizer: summ itemized_share, d
			duplicates report h_seq taxunit2
			keep yof h_seq taxunit2 itemized_share
			tempfile item_deduc
			save `item_deduc', replace
		restore
		
		cap drop _merge
		merge m:1 yof h_seq taxunit2 using `item_deduc'
		assert agi<10000 if _m<3&agi<.
		drop _merge

		// Put the agi back to current year dollars and create a table for benchmarking against SOI
		preserve
			cap drop cpi
			set obs `=`=_N'+1'
			cap drop year
			qui gen year = `y'-1 // y is yof
			local ym1 = `y'-1
			qui replace year=1999 if ppos==.
			getcpi, year(year) gen(cpi)
			qui summ cpi if year==`ym1', mean
			replace cpi=cpi/r(mean)
			qui replace agi=agi/cpi
			qui summ agi
			// Create a new y_group for imputation
			local magi=r(max)
			cap drop y_group
			recode agi  (0/4999=0) (5000/9999=1) (10000/14999=2) (15000/19999=3) ///
				   (20000/24999=4) (25000/29999=5) (30000/39999=6) (40000/49999=7) ///
				   (50000/74999=8) (75000/99999=9) (100000/199999=10) (200000/499999=11) ///
				   (500000/999999=12) (1000000/1499999=13) (1500000/`magi'=14), gen(y_group)			   
			
			cap drop deduc_amt
			qui gen deduc_amt = itemized_share*agi if itemized_share>0&itemized_share<.
			qui replace deduc_amt=0 if itemized_share==.
			qui gen deduc_item = itemized_share>0
			bys h_seq taxunit2: keep if _n==1
			keep if inlist(filer_status,1,2,3,4)
			collapse (mean) deduc_* , by(y_group)
			noi di "Deductions"
			noi list if y_group>=0, clean noobs // this is our breakdown of deductions
		restore

	keep yof h_seq ppos taxunit* *_i agi* y_group *wages* filer_stat fica* *bill 
	save temp_tukey`y', replace
	}
	
	
	use temp_tukey1986, clear
	forvalues y=1987/2012 {
		append using temp_tukey`y'
	}
	save CPS_taxsim_tukey2, replace
}
